Setup

Load R libraries

library(data.table)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(limma)
library(biomaRt)
library(fgsea)
library(goseq)

theme_set(theme_classic())
graph_weight = params$graph_weight
graph_weight
## [1] "1.0"

Check enrichment of gene sets

Read in gene info and gene set assignments

cell_type_name = "CD8T"
file_tag = sprintf("%s_%s", cell_type_name, graph_weight)

assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag), 
                     what = character(), sep="\n")

gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag), 
                 what = character(), sep="\n")

gene_sets = sapply(gene_sets, strsplit, split=",")
n_genes   = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15.00   19.75   20.00   20.00   21.00   22.00
length(n_genes)
## [1] 40
sort(n_genes)
##  [1] 15 16 18 19 19 19 19 19 19 19 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
## [26] 20 21 21 21 21 21 21 21 21 21 21 22 22 22 22
sum(n_genes)
## [1] 800

Find gene symbols

Find gene symbols from bioMart.

All the gene symbols that can be found in bioMart are consistent with what we have. So no need to run it.

ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")

gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"), 
                filters = "external_gene_name", 
                values = assayed_genes, 
                mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]

table(assayed_genes %in% gene_BM$external_gene_name)

t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]

table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]

Find gene symbols using the alias2Symbol function from limma.

a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
  gi = assayed_genes[i]
  ai = alias2Symbol(gi)
  if(length(ai) > 1){
    print(gi)
    print(ai)
  }
  a2s[i] = ai[1]
}
## [1] "C7orf55"
## [1] "FMC1-LUC7L2" "FMC1"       
## [1] "MARS"
## [1] "MARS1" "SLA2" 
## [1] "QARS"
## [1] "EPRS1" "QARS1"
## [1] "APITD1"
## [1] "CENPS-CORT" "CENPS"     
## [1] "HIST1H2BC"
## [1] "H2BC5" "H2BC4"
table(is.na(a2s))
## 
## FALSE  TRUE 
##  1120   139
table(a2s == assayed_genes, useNA = 'ifany')
## 
## FALSE  TRUE  <NA> 
##    71  1049   139
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)

gene_info[sym_in_data != sym_limma,]
##      sym_in_data   sym_limma
##  1:        ZNRD1      POLR1H
##  2:      C2orf43        LDAH
##  3:       FAM45A     DENND10
##  4:        MTERF      MTERF1
##  5:        NARG2        ICE2
##  6:     C6orf203      MTRES1
##  7:       HRSP12        RIDA
##  8:        FOPNL       CEP20
##  9:         WARS       WARS1
## 10:         LINS       LINS1
## 11:       RRNAD1    METTL25B
## 12:      FAM122B      PABIR2
## 13:       WDYHV1       NTAQ1
## 14:    HIST1H2BD       H2BC5
## 15:       AGPAT6       GPAT4
## 16:     C1orf123        CZIB
## 17:     KIAA1841       SANBR
## 18:      C2orf44        WDCP
## 19:      TMEM155      SMIM43
## 20:      C7orf55 FMC1-LUC7L2
## 21:      C9orf89      CARD19
## 22:     C11orf82       DDIAS
## 23:     XRCC6BP1       ATP23
## 24:         MARS       MARS1
## 25:     C19orf55     PROSER3
## 26:    HIST1H2AE       H2AC8
## 27:         QARS       EPRS1
## 28:       APITD1  CENPS-CORT
## 29:      CCDC101       SGF29
## 30:        PDDC1       GATD1
## 31:      EFCAB4A     CRACR2B
## 32:      EIF2S3L     EIF2S3B
## 33:    HIST1H2BC       H2BC5
## 34:          WRB        GET1
## 35:     TMEM194B       NEMP2
## 36:     HIST1H4C        H4C3
## 37:     HIST1H4J       H4C11
## 38:       ZNF720      KRBOX5
## 39:    HIST1H2BH       H2BC9
## 40:       FAM49A       CYRIA
## 41:        TTC37       SKIC3
## 42:      C1orf85        GLMP
## 43:     C9orf114      SPOUT1
## 44:   HIST2H2AA4      H2AC19
## 45:    HIST2H2BF      H2BC18
## 46:    C10orf128     TMEM273
## 47:     TCTEX1D2     DYNLT2B
## 48:     C17orf89     NDUFAF8
## 49:        PHBP3      PHB1P3
## 50:      SEPT7P7   SEPTIN7P7
## 51:    LINC00493      SMIM26
## 52:    HIST1H2BN      H2BC15
## 53:       ATP5J2      ATP5MF
## 54:        AKAP2  PALM2AKAP2
## 55:        WDR65      CFAP57
## 56:    SRP14-AS1    SRP14-DT
## 57: CTC-436P18.1  SMIM15-AS1
## 58:     HIST1H3G        H3C8
## 59:     EFTUD1P1      EFL1P1
## 60:     ATP5A1P3   ATP5F1AP3
## 61:     CCDC109B        MCUB
## 62:      C1orf63       RSRP1
## 63:       TMEM66       SARAF
## 64:       PCNXL2       PCNX2
## 65:         ACRC        GCNA
## 66:       AMICA1        JAML
## 67:       PRMT10       PRMT9
## 68:      FAM102A       EEIG1
## 69:         SELM     SELENOM
## 70:       NBPF16      NBPF15
## 71:       RPL9P9      RPL9P8
##      sym_in_data   sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma), gene_symbol := sym_limma]

dim(gene_info)
## [1] 1259    3
gene_info[1:5,]
##    sym_in_data sym_limma gene_symbol
## 1:    C1orf112  C1orf112    C1orf112
## 2:      MAD1L1    MAD1L1      MAD1L1
## 3:        ICA1      ICA1        ICA1
## 4:     NDUFAF7   NDUFAF7     NDUFAF7
## 5:         ST7       ST7         ST7
t1 = table(gene_info$gene_symbol)
table(t1)
## t1
##    1    2 
## 1255    2
gene_info[gene_symbol %in% names(t1)[t1 == 2],]
##    sym_in_data sym_limma gene_symbol
## 1:   HIST1H2BD     H2BC5       H2BC5
## 2:   HIST1H2BC     H2BC5       H2BC5
## 3:      RPL9P8    RPL9P8      RPL9P8
## 4:      RPL9P9    RPL9P8      RPL9P8
gene_info[sym_in_data == "HIST1H2BC", gene_symbol:="H2BC4"]
gene_info[sym_in_data == "RPL9P9", gene_symbol:="RPL9P9"]

Prepare gene set information

Gene set annotations (by gene symbols) were downloaded from MSigDB website.

gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]]    = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"
gmtfile[["immune"]]   = "../Annotation/c7.all.v2023.2.Hs.symbols.gmt"

pathways = list()
for(k1 in names(gmtfile)){
  pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}

names(pathways)
## [1] "reactome" "go_bp"    "immune"
sapply(pathways, length)
## reactome    go_bp   immune 
##     1692     7647     5219

Filter gene sets for size between 10 and 500.

lapply(pathways, function(v){
  quantile(sapply(v, length), probs = seq(0, 1, 0.1), na.rm = TRUE)
})
## $reactome
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    7.0    9.0   12.0   17.0   23.0   31.0   44.0   71.8  120.9 1463.0 
## 
## $go_bp
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    6.0    8.0   10.0   14.0   19.0   29.0   46.0   80.8  183.0 1966.0 
## 
## $immune
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
##    5  162  193  197  199  199  200  200  200  200 1992
for(k1 in names(pathways)){
  p1 = pathways[[k1]]
  pathways[[k1]] = p1[sapply(p1, length) %in% 10:500]
}

Conduct enrichment analysis

dim(gene_info)
## [1] 1259    3
gene_info[1:2,]
##    sym_in_data sym_limma gene_symbol
## 1:    C1orf112  C1orf112    C1orf112
## 2:      MAD1L1    MAD1L1      MAD1L1
max_n2kp = 10

goseq_res = NULL

for(k in 1:length(gene_sets)){
  if(length(gene_sets[[k]]) < 10) { next }
  
  print(k)
  set_k = paste0("set_", k)
  print(gene_sets[[k]])
  
  genes = gene_info$sym_in_data %in% gene_sets[[k]]
  names(genes) = gene_info$gene_symbol
  table(genes)
  
  pwf = nullp(genes, "hg38", "geneSymbol")
  
  for(k1 in names(pathways)){
    p1 = pathways[[k1]]
    res1 = goseq(pwf, "hg38", "geneSymbol", 
                 gene2cat=goseq:::reversemapping(p1))
    res1$FDR  = p.adjust(res1$over_represented_pvalue, method="BH")
    
    nD = sum(res1$FDR < 0.05)
    
    if(nD > 0){
      res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
      res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
      res1$category = gsub("_", " ", res1$category)
      res1$category = tolower(res1$category)
      res1$category = substr(res1$category, start=1, stop=120)
      goseq_res[[set_k]][[k1]] = res1
    }
  }
}
## [1] 1
##  [1] "MRPS33"     "MTFMT"      "CPD"        "CUL9"       "CCR2"      
##  [6] "PARP2"      "C11orf1"    "C7orf55"    "GLB1"       "ETFDH"     
## [11] "MRPL48"     "HIST1H2BC"  "ZNF765"     "TTC37"      "HIST2H2AA4"
## [16] "HIST1H2BN"

## [1] 2
##  [1] "PTP4A1"        "FAHD2A"        "MYB"           "DTD2"         
##  [5] "DPH6"          "NARF"          "ZBTB39"        "FBXO22"       
##  [9] "BOLA1"         "RP11-383G10.3" "UBA52P6"       "RP11-365H23.1"
## [13] "RPS27AP2"      "ADH5P4"        "PTGES3P1"      "AC007041.2"   
## [17] "RP11-382J24.2" "LEF1"          "SIK1"          "MTND4P12"     
## [21] "RP11-727F15.9"

## [1] 3
##  [1] "REC8"         "DUS4L"        "GALK1"        "IFNG"         "GNPTAB"      
##  [6] "MCM9"         "GORASP1"      "STK25"        "STYXL1"       "TMEM218"     
## [11] "LRRC28"       "SPATA13"      "EDARADD"      "PIK3IP1"      "ARRDC2"      
## [16] "SLC38A2"      "AMICA1"       "PFKFB3"       "RBKS"         "RP11-44N11.1"

## [1] 4
##  [1] "SH3BP2"  "SNX14"   "TAF10"   "B2M"     "AFMID"   "TMSB4X"  "IFRD1"  
##  [8] "RGPD5"   "CD44"    "ARHGEF1" "STK17B"  "RGCC"    "EGR1"    "FOSB"   
## [15] "JUND"    "BTG2"    "CXCR5"   "TTC3"    "RGPD6"   "MALAT1"

## [1] 5
##  [1] "CD200"         "RP3-324O17.4"  "ZNF419"        "ZNF684"       
##  [5] "IL10"          "RHEBL1"        "C19orf55"      "ZNF93"        
##  [9] "PIGW"          "C2orf76"       "ANAPC10P1"     "RSC1A1"       
## [13] "RP11-247I13.6" "CTC-338M12.4"  "RP11-262H14.3" "CTB-4E7.1"    
## [17] "HMGB1P24"      "AP000462.1"    "RP11-307P22.1" "RP11-638I2.10"
## [21] "RP11-1012A1.7" "TRAV30"

## [1] 6
##  [1] "PIM2"          "IFI6"          "USF1"          "IFI27"        
##  [5] "RP11-23P13.6"  "ZNF80"         "EIF2S3L"       "ABHD16B"      
##  [9] "ZNF649"        "SGMS1"         "TRBV20-1"      "TRAV19"       
## [13] "TRAV24"        "RP4-800M22.1"  "RPS11P5"       "RP11-288E14.2"
## [17] "AC144530.1"    "RP11-693N9.2"  "EEF1A1P10"     "RP11-705C15.5"

## [1] 7
##  [1] "VPS37A"        "THAP2"         "TRAV16"        "TRAV21"       
##  [5] "RP1-102E24.1"  "RP3-340B19.5"  "RP11-2F9.3"    "RPL19P12"     
##  [9] "RP11-270C12.3" "PGAM4"         "GS1-124K5.2"   "RPL19P21"     
## [13] "AC090804.1"    "RP11-446E9.1"  "CTD-2301A4.3"  "CTB-33G10.1"  
## [17] "RNF138P1"      "MIR146A"       "HSPA8P5"       "RP11-511H9.4" 
## [21] "RP11-304L19.5"

## [1] 8
##  [1] "CPQ"           "RCAN3"         "C1orf63"       "NBPF14"       
##  [5] "PCNXL2"        "NBPF10"        "AC018720.10"   "LDOC1"        
##  [9] "TSHZ2"         "NBPF16"        "HLA-J"         "AC084018.1"   
## [13] "HCG4P5"        "RPL10P3"       "HLA-W"         "LINC00674"    
## [17] "PSMD6-AS2"     "ZNF10"         "SLC7A5P1"      "CTD-2017D11.1"

## [1] 9
##  [1] "RGS1"          "GMPR2"         "PRMT7"         "TRANK1"       
##  [5] "TRAPPC11"      "CXCR6"         "COA3"          "CMC1"         
##  [9] "MT1X"          "CCL3L1"        "TECPR1"        "TRGC2"        
## [13] "PET100"        "PVT1"          "CCL3L3"        "RP11-345J4.6" 
## [17] "EIF1"          "RP11-572P18.1" "RPS3AP6"       "CTD-2031P19.4"

## [1] 10
##  [1] "ZNF85"        "MTHFS"        "SCRN2"        "FAM122B"      "TMEM155"     
##  [6] "LAIR2"        "ZBTB8OS"      "ASB8"         "GEN1"         "ARHGAP11B"   
## [11] "TMEM194B"     "HIST1H4C"     "ZNF28"        "CRYGS"        "AC098614.2"  
## [16] "BDH2P1"       "GAPDHP2"      "RP11-265N6.2" "NPAT"         "TCEAL3"      
## [21] "TMEM9B-AS1"

## [1] 11
##  [1] "REXO1"   "POP4"    "C3orf14" "GALE"    "TMEM214" "CCDC77"  "CKS2"   
##  [8] "UROD"    "CCNB1"   "FHOD1"   "RPP30"   "MPPE1"   "GALK2"   "CCNB2"  
## [15] "POP5"    "RUFY2"   "MYCBP"   "KIR3DL2"

## [1] 12
##  [1] "CDKN3"      "TIMM9"      "TXNDC17"    "FIGNL1"     "TIMM10"    
##  [6] "TROAP"      "GEMIN6"     "NDUFAF6"    "CCDC28B"    "PAQR4"     
## [11] "C11orf82"   "ZFYVE19"    "UBE2C"      "TYMS"       "SESTD1"    
## [16] "NCKIPSD"    "MDP1"       "HIST1H3G"   "PMF1-BGLAP"

## [1] 13
##  [1] "SLAMF8"        "TNFAIP8L2"     "EFCAB7"        "RPSAP54"      
##  [5] "RP11-12M9.4"   "RPS19P1"       "CALM2P3"       "ARPC3P1"      
##  [9] "PRDX3P1"       "SKP1P1"        "AC093391.2"    "AP000476.1"   
## [13] "RPSAP4"        "RPS19P3"       "AC018462.3"    "GAPDHP60"     
## [17] "RP11-402J6.3"  "RP13-638C3.4"  "RP11-209D14.4" "CTB-52I2.4"   
## [21] "RP11-282O18.6"

## [1] 14
##  [1] "GCFC2"       "MFAP3"       "CD84"        "CDKL1"       "XAF1"       
##  [6] "MFSD9"       "CASP1"       "TMEM19"      "TATDN1"      "FCRL3"      
## [11] "ZNF720"      "SH2D1B"      "CPT1B"       "UBXN2B"      "LINC00426"  
## [16] "RBBP4P1"     "SNORD3A"     "AC024592.12" "MS4A1"       "PLA2G4B"

## [1] 15
##  [1] "GPN2"          "ZBED2"         "PLA2G6"        "TXK"          
##  [5] "CLDND1"        "MAN1C1"        "RASGEF1B"      "FRMD4A"       
##  [9] "POLR2J3"       "YPEL2"         "DENND5A"       "RP11-514P8.6" 
## [13] "RP11-91K8.4"   "MTND1P23"      "ACTBP12"       "RP1-187B23.1" 
## [17] "LINC00944"     "RP11-335G20.7" "RP11-1000B6.3" "KCNQ1OT1"

## [1] 16
##  [1] "RNF10"          "BCL3"           "DERL2"          "MBNL3"         
##  [5] "FMR1"           "METTL8"         "KATNBL1"        "TPGS2"         
##  [9] "TMOD3"          "LINS"           "LPP"            "KBTBD2"        
## [13] "KATNA1"         "C17orf89"       "PPIL3"          "ATP5J2"        
## [17] "RP11-386G11.10" "CLK1"           "ABLIM1"         "TC2N"

## [1] 17
##  [1] "MED24"   "TM7SF3"  "POLD3"   "DUSP12"  "PLA2G15" "RASSF4"  "NR2C1"  
##  [8] "NQO2"    "ASNSD1"  "TBCD"    "UCK2"    "PPP4R1"  "NTAN1"   "PPARA"  
## [15] "ZNF511"  "TRGV10"  "TREX1"   "MRPL33"  "ABHD14A" "GGT7"

## [1] 18
##  [1] "DYRK4"         "NSMAF"         "NHP2P1"        "IL18RAP"      
##  [5] "FAM45A"        "MTERF"         "NUDT7"         "AMDHD2"       
##  [9] "NAALADL1"      "EFCAB4A"       "WDR53"         "ZNF749"       
## [13] "TRGV5"         "RP11-247I13.3" "RP11-119F19.2" "HNRNPKP2"     
## [17] "PA2G4P4"       "EEF1A1P14"     "TRGV7"         "KB-1507C5.2"  
## [21] "TRAV1-2"       "CTB-31O20.3"

## [1] 19
##  [1] "PUS7L"    "KHDC1"    "SFR1"     "KIAA1841" "LRRC45"   "NME6"    
##  [7] "SWI5"     "ZNF613"   "LCORL"    "LRTOMT"   "CCDC30"   "ENTPD5"  
## [13] "METTL6"   "MYL5"     "PIN4P1"   "SAPCD1"   "GK3P"     "RAB1C"   
## [19] "GOT2P3"   "ZEB1-AS1" "RPL13P5"

## [1] 20
##  [1] "PRSS23"  "AMFR"    "MRPL55"  "RPS17"   "TNF"     "PPP2R5C" "RPL34"  
##  [8] "RPS12"   "RPL21"   "RPL5"    "BTG1"    "PDCD4"   "ZFP36L2" "RPL9"   
## [15] "RPS18"

## [1] 21
##  [1] "CCL3"     "GLT8D1"   "TBPL1"    "FAR2"     "GZMB"     "MAN2B1"  
##  [7] "SLC12A4"  "MT2A"     "ANXA1"    "IRF4"     "GCHFR"    "GTF2A2"  
## [13] "C1orf123" "EXTL2"    "METTL18"  "PRF1"     "NCR3"     "UBE2D3P2"
## [19] "NR4A2"    "CD55"

## [1] 22
##  [1] "MAP3K13"        "ZNF268"         "FAH"            "EGR2"          
##  [5] "TMSB15B"        "MEI1"           "ADCK5"          "SNHG11"        
##  [9] "ZNF257"         "ZNF525"         "LYPLA1P3"       "ZNF826P"       
## [13] "LINC00493"      "SNRPEP4"        "FAM200B"        "RP11-552M11.4" 
## [17] "SRP14-AS1"      "TPM3P6"         "USP30-AS1"      "RP11-1094M14.7"
## [21] "RIMKLB"

## [1] 23
##  [1] "OCEL1"        "DHDDS"        "PGPEP1"       "RRNAD1"       "RNF25"       
##  [6] "SLC39A13"     "RPSAP47"      "ZNF544"       "TRAV8-2"      "RP4-694B14.5"
## [11] "DNASE1"       "PTGES3P2"     "CCT8P1"       "RP11-350G8.3" "SEPT7P7"     
## [16] "RP11-782C8.1" "TSEN15P1"     "FAM21FP"      "ETF1P2"       "SF3A3P2"     
## [21] "RP4-739H11.4"

## [1] 24
##  [1] "STAU2"   "P2RX5"   "ERGIC2"  "DHPS"    "THOC5"   "DDX49"   "SRM"    
##  [8] "IMP4"    "MAP3K6"  "LIMD1"   "GLUD1"   "CXXC1"   "MITD1"   "MARS"   
## [15] "NT5DC1"  "DUSP28"  "PIK3R4"  "TNRC6C"  "LMO7"    "PPP1R10"

## [1] 25
##  [1] "CRTAM"      "HAVCR1"     "TNFSF10"    "PTGER2"     "IFI44L"    
##  [6] "IFI44"      "APH1B"      "SPON2"      "MT1E"       "HOPX"      
## [11] "TRBV9"      "TRAV13-1"   "MIR155HG"   "AC092580.4" "RN7SL749P" 
## [16] "AC005329.7" "RNU1-125P"  "AC006483.1" "MIR142"     "AC006129.4"

## [1] 26
##  [1] "NOP16"    "WDR3"     "GTPBP10"  "NOP2"     "DKC1"     "MTO1"    
##  [7] "BCAS3"    "PELP1"    "UTP23"    "PHAX"     "ZFP62"    "ZNF431"  
## [13] "ZNF770"   "C9orf114" "ZBTB48"   "RASGRF2"  "SGSM2"    "FAM102A" 
## [19] "DNHD1"

## [1] 27
##  [1] "C14orf93"      "MS4A6A"        "TMEM138"       "TRPT1"        
##  [5] "GSTM4"         "TRDV1"         "RP13-383K5.4"  "CD27-AS1"     
##  [9] "TSTD1"         "RNU2-2P"       "AC012318.3"    "RP11-773D16.1"
## [13] "RP4-742C19.12" "TRGV2"         "MAPKAPK5-AS1"  "AC104820.2"   
## [17] "LINC00484"     "RP11-305L7.3"  "RP11-416A17.6" "CTD-2521M24.9"

## [1] 28
##  [1] "TNFRSF9"  "PON2"     "KLRB1"    "TEP1"     "ACTR10"   "KLRC1"   
##  [7] "BORA"     "SPPL2A"   "PANK4"    "MGAT4B"   "FCGR3B"   "SLC38A9" 
## [13] "WRB"      "CCL4L2"   "FCGR3A"   "MSH5"     "CCL4L1"   "KLRC2"   
## [19] "TCTEX1D2"

## [1] 29
##  [1] "PECR"         "XCL1"         "XCL2"         "CX3CR1"       "GTSF1"       
##  [6] "ZNF683"       "FCRL6"        "C10orf128"    "TRBV4-2"      "TRAV14DV4"   
## [11] "TRAV29DV5"    "TRAV35"       "KANSL1-AS1"   "RP1-182O16.1" "PATL2"       
## [16] "AKAP2"        "RPL37P23"     "RP11-81H14.2" "AC139149.1"   "TPMTP1"      
## [21] "LUZP6"

## [1] 30
##  [1] "ZNRD1"   "MICALL1" "CAPN15"  "POLR2I"  "CD86"    "MTFR1L"  "IFIT3"  
##  [8] "IFIT2"   "UXT"     "HRSP12"  "RSAD2"   "CLSTN3"  "KSR1"    "MED11"  
## [15] "POLR2H"  "IL15"    "TMEM107" "IFIT1"   "TRGV3"

## [1] 31
##  [1] "MDGA1"         "CDCA7L"        "ZNF816"        "RPS12P26"     
##  [5] "TCF7"          "CCR7"          "C1orf162"      "LDLRAP1"      
##  [9] "PLXDC1"        "FILIP1L"       "TMEM150A"      "CD248"        
## [13] "POU6F1"        "TRABD2A"       "MIR4461"       "CTD-3092A11.1"
## [17] "RP11-632K20.7" "CHRM3-AS2"     "RP11-285F7.2"  "WHAMMP2"      
## [21] "RP1-313I6.12"

## [1] 32
##  [1] "PTPN18"        "TIPIN"         "NSMCE4A"       "NCOA7"        
##  [5] "AHI1"          "APTX"          "RNF121"        "HIRIP3"       
##  [9] "MFSD6"         "ALG8"          "PRIMPOL"       "CTSB"         
## [13] "GNGT2"         "KIR3DL1"       "TBCA"          "MZT2A"        
## [17] "DALRD3"        "TNFRSF18"      "KIR2DS4"       "RP11-271C24.3"

## [1] 33
##  [1] "PDE6B"         "ZNF678"        "RP4-728D4.2"   "RP11-493L12.6"
##  [5] "DGKA"          "NEK1"          "FBXO32"        "EPB41"        
##  [9] "PLGLB1"        "RPS26P6"       "RPS3AP26"      "RPL9P8"       
## [13] "RPL3P4"        "AP001468.1"    "RPL9P9"        "RPL9P7"       
## [17] "ATP1B3-AS1"    "RP13-488H8.1"  "RP11-320M16.1"

## [1] 34
##  [1] "PMM1"           "TFDP2"          "C2orf43"        "CCRL2"         
##  [5] "NARG2"          "TMEM241"        "SLC2A8"         "SLC35A5"       
##  [9] "FUOM"           "DRAM2"          "LZTFL1"         "TRIM68"        
## [13] "TMEM134"        "SMIM19"         "SLC9A9"         "TRDC"          
## [17] "RP11-254B13.4"  "BHLHE40-AS1"    "RP11-844P9.3"   "RP11-265N6.3"  
## [21] "MIR3615"        "RP11-686D22.10"

## [1] 35
##  [1] "CHI3L2"        "NT5C2"         "WARS"          "TMPRSS3"      
##  [5] "DNAJC18"       "PSMG1"         "FAM49A"        "PSMA6P1"      
##  [9] "AC009403.2"    "RNU2-63P"      "CDC42P1"       "AC002331.1"   
## [13] "RP11-32B5.1"   "RP3-395M20.8"  "RN7SL56P"      "GMPSP1"       
## [17] "RPPH1"         "MIR3661"       "AL161784.1"    "CTD-3214H19.4"
## [21] "CTD-2545M3.2"  "RP11-297D21.4"

## [1] 36
##  [1] "C9orf89"      "CMAHP"        "RASGRP2"      "MGAT4A"       "HECA"        
##  [6] "NR4A3"        "FAM117B"      "PLAC8"        "ACRC"         "TIPARP"      
## [11] "PRMT10"       "CHD2"         "PLK3"         "ZBTB20"       "ANKRD37"     
## [16] "SELL"         "AC021593.1"   "LTB"          "RP11-747H7.3" "RP11-434H6.7"

## [1] 37
##  [1] "NDUFAF7"    "SMG6"       "IL4R"       "USP48"      "WDR7"      
##  [6] "UTP6"       "EMG1"       "L3HYPDH"    "DEF8"       "SEC24D"    
## [11] "AGPAT6"     "BAP1"       "GNG4"       "CHAMP1"     "MBD5"      
## [16] "CATSPER2P1" "FOXP1"      "DHRS3"      "SATB1"

## [1] 38
##  [1] "RABGAP1"     "NTHL1"       "GBA2"        "DHRS12"      "LIN7B"      
##  [6] "ILVBL"       "LIG1"        "SNX8"        "PRR4"        "RCOR3"      
## [11] "NUP85"       "CHTF18"      "FOPNL"       "TTLL4"       "COG5"       
## [16] "KNTC1"       "IFRD2"       "RP11-33B1.1" "PMEPA1"      "MPP7"

## [1] 39
##  [1] "WDR13"      "GABPB1"     "SLC11A2"    "LOXL3"      "FANCL"     
##  [6] "ID3"        "IRF8"       "PMAIP1"     "PLIN2"      "CETN3"     
## [11] "MTHFD2L"    "NFU1"       "PCGF5"      "CADM1"      "BTLA"      
## [16] "TDRD7"      "HIST1H4J"   "FANCG"      "AL358333.1"

## [1] 40
##  [1] "CAPN12"       "TRAV12-2"     "PLXND1"       "APBA2"        "UAP1"        
##  [6] "TMEM66"       "AOAH"         "CDC42EP3"     "ZNF589"       "NPIPB3"      
## [11] "NPIPB4"       "RPS26"        "FAM169A"      "KLRK1"        "MTND2P28"    
## [16] "CTD-2328D6.1" "RPL41"        "NPIPB5"       "MTATP6P1"     "QRSL1P3"     
## [21] "RP11-51J9.5"

for(n1 in names(goseq_res)){
  k = as.numeric(gsub("set_", "", n1))
  print(n1)
  print(gene_sets[[k]])
  print(goseq_res[[n1]])

}
## [1] "set_3"
##  [1] "REC8"         "DUS4L"        "GALK1"        "IFNG"         "GNPTAB"      
##  [6] "MCM9"         "GORASP1"      "STK25"        "STYXL1"       "TMEM218"     
## [11] "LRRC28"       "SPATA13"      "EDARADD"      "PIK3IP1"      "ARRDC2"      
## [16] "SLC38A2"      "AMICA1"       "PFKFB3"       "RBKS"         "RP11-44N11.1"
## $go_bp
##                         category over_represented_pvalue
## 241 carbohydrate phosphorylation            6.272519e-07
##     under_represented_pvalue numDEInCat numInCat         FDR
## 241                        1          4        5 0.002563578
## 
## [1] "set_11"
##  [1] "REXO1"   "POP4"    "C3orf14" "GALE"    "TMEM214" "CCDC77"  "CKS2"   
##  [8] "UROD"    "CCNB1"   "FHOD1"   "RPP30"   "MPPE1"   "GALK2"   "CCNB2"  
## [15] "POP5"    "RUFY2"   "MYCBP"   "KIR3DL2"
## $go_bp
##                   category over_represented_pvalue under_represented_pvalue
## 3987 trna 5 leader removal            8.087298e-06                1.0000000
## 3698  rna 5 end processing            2.995770e-05                0.9999999
## 3699           rna capping            2.995770e-05                0.9999999
## 3986 trna 5 end processing            2.995770e-05                0.9999999
##      numDEInCat numInCat        FDR
## 3987          3        3 0.03060928
## 3698          3        4 0.03060928
## 3699          3        4 0.03060928
## 3986          3        4 0.03060928
## 
## $immune
##                                             category over_represented_pvalue
## 3317 gse36826 wt vs il1r ko skin staph aureus inf up             8.40456e-06
##      under_represented_pvalue numDEInCat numInCat        FDR
## 3317                0.9999998          5       17 0.04285485
## 
## [1] "set_20"
##  [1] "PRSS23"  "AMFR"    "MRPL55"  "RPS17"   "TNF"     "PPP2R5C" "RPL34"  
##  [8] "RPS12"   "RPL21"   "RPL5"    "BTG1"    "PDCD4"   "ZFP36L2" "RPL9"   
## [15] "RPS18"  
## $reactome
##                                                        category
## 890 srp dependent cotranslational protein targeting to membrane
## 256                           eukaryotic translation initiation
## 734           response of eif2ak4 gcn2 to amino acid deficiency
## 255                           eukaryotic translation elongation
## 691                 regulation of expression of slits and robos
## 547                                 nonsense mediated decay nmd
## 808                                 selenoamino acid metabolism
## 986                                                 translation
## 117                             cellular response to starvation
## 869                                 signaling by robo receptors
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 890            1.418360e-05                0.9999992          7       29
## 256            1.575643e-05                0.9999991          7       30
## 734            1.751556e-05                0.9999990          7       30
## 255            1.766013e-05                0.9999990          7       31
## 691            1.991804e-05                0.9999988          7       31
## 547            2.074074e-05                0.9999988          7       31
## 808            2.347818e-05                0.9999986          7       32
## 986            2.350223e-05                0.9999983          8       46
## 117            2.692313e-05                0.9999983          7       33
## 869            2.696435e-05                0.9999983          7       33
##             FDR
## 890 0.002755757
## 256 0.002755757
## 734 0.002755757
## 255 0.002755757
## 691 0.002755757
## 547 0.002755757
## 808 0.002755757
## 986 0.002755757
## 117 0.002755757
## 869 0.002755757
## 
## $go_bp
##                    category over_represented_pvalue under_represented_pvalue
## 515 cytoplasmic translation            1.573413e-06                0.9999999
##     numDEInCat numInCat         FDR
## 515          7       33 0.006430541
## 
## $immune
##                           category over_represented_pvalue
## 1974 gse22886 naive tcell vs dc up            4.358409e-07
##      under_represented_pvalue numDEInCat numInCat         FDR
## 1974                        1          7       33 0.002222353
## 
## [1] "set_26"
##  [1] "NOP16"    "WDR3"     "GTPBP10"  "NOP2"     "DKC1"     "MTO1"    
##  [7] "BCAS3"    "PELP1"    "UTP23"    "PHAX"     "ZFP62"    "ZNF431"  
## [13] "ZNF770"   "C9orf114" "ZBTB48"   "RASGRF2"  "SGSM2"    "FAM102A" 
## [19] "DNHD1"   
## $go_bp
##                 category over_represented_pvalue under_represented_pvalue
## 3693 ribosome biogenesis            8.642201e-07                1.0000000
## 1442    ncrna processing            5.031860e-06                0.9999997
##      numDEInCat numInCat         FDR
## 3693          7       35 0.003532067
## 1442          7       42 0.010282606
## 
## [1] "set_30"
##  [1] "ZNRD1"   "MICALL1" "CAPN15"  "POLR2I"  "CD86"    "MTFR1L"  "IFIT3"  
##  [8] "IFIT2"   "UXT"     "HRSP12"  "RSAD2"   "CLSTN3"  "KSR1"    "MED11"  
## [15] "POLR2H"  "IL15"    "TMEM107" "IFIT1"   "TRGV3"  
## $immune
##                                                                                category
## 5059                                   scherer pbmc apsv wetvax age 18 32yo 2 to 4dy up
## 2344                                     gse26030 th1 vs th17 day5 post polarization up
## 3498 gse37533 pparg1 foxp3 vs pparg2 foxp3 transduced cd4 tcell pioglitazone treated dn
## 5061                                   scherer pbmc apsv wetvax age 18 32yo 5 to 7dy up
## 1648                                 gse21360 primary vs quaternary memory cd8 tcell up
## 4652                                        gse7509 dc vs monocyte with fcgriib stim dn
## 4763                                            gse8835 cd4 vs cd8 tcell cll patient up
## 466                                     gse14000 unstim vs 16h lps dc translated rna dn
## 1452      gse19888 adenosine a3r inh vs act with inhibitor pretreatment in mast cell up
## 1317                       gse18281 subcapsular vs central cortical region of thymus dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 5059            1.576970e-06                1.0000000          5       12
## 2344            3.135367e-06                0.9999999          6       24
## 3498            4.853328e-06                0.9999998          6       25
## 5061            2.606480e-05                0.9999985          6       34
## 1648            4.127326e-05                0.9999982          5       22
## 4652            5.214184e-05                0.9999976          5       23
## 4763            5.405585e-05                0.9999975          5       23
## 466             6.332070e-05                0.9999970          5       23
## 1452            7.653433e-05                0.9999961          5       25
## 1317            9.103684e-05                0.9999952          5       26
##              FDR
## 5059 0.007993618
## 2344 0.007993618
## 3498 0.008249039
## 5061 0.033226104
## 1648 0.039375824
## 4652 0.039375824
## 4763 0.039375824
## 466  0.040359029
## 1452 0.042095927
## 1317 0.042095927
## 
## [1] "set_34"
##  [1] "PMM1"           "TFDP2"          "C2orf43"        "CCRL2"         
##  [5] "NARG2"          "TMEM241"        "SLC2A8"         "SLC35A5"       
##  [9] "FUOM"           "DRAM2"          "LZTFL1"         "TRIM68"        
## [13] "TMEM134"        "SMIM19"         "SLC9A9"         "TRDC"          
## [17] "RP11-254B13.4"  "BHLHE40-AS1"    "RP11-844P9.3"   "RP11-265N6.3"  
## [21] "MIR3615"        "RP11-686D22.10"
## $immune
##                                                     category
## 4783           gse8921 unstim vs tlr1 2 stim monocyte 12h up
## 2532 gse2770 tgfb and il4 vs il4 treated act cd4 tcell 6h up
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4783            1.594931e-06                1.0000000          5       16
## 2532            4.743483e-06                0.9999999          5       19
##              FDR
## 4783 0.008132555
## 2532 0.012093510
saveRDS(goseq_res, sprintf("output/gene_set_enrichments_%s.RDS", 
                           file_tag))

Session information

gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  8958397 478.5   17067254 911.5         NA 17067254 911.5
## Vcells 19166084 146.3   58167881 443.8      65536 78485130 598.8
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] TxDb.Hsapiens.UCSC.hg38.knownGene_3.16.0
##  [2] GenomicFeatures_1.50.4                  
##  [3] GenomicRanges_1.50.2                    
##  [4] GenomeInfoDb_1.34.9                     
##  [5] org.Hs.eg.db_3.16.0                     
##  [6] AnnotationDbi_1.60.2                    
##  [7] IRanges_2.32.0                          
##  [8] S4Vectors_0.36.2                        
##  [9] Biobase_2.58.0                          
## [10] BiocGenerics_0.44.0                     
## [11] goseq_1.50.0                            
## [12] geneLenDataBase_1.34.0                  
## [13] BiasedUrn_2.0.10                        
## [14] fgsea_1.24.0                            
## [15] biomaRt_2.54.1                          
## [16] limma_3.54.2                            
## [17] tidyr_1.3.0                             
## [18] ggpubr_0.6.0                            
## [19] ggplot2_3.4.2                           
## [20] data.table_1.14.8                       
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-162                matrixStats_1.0.0          
##  [3] bitops_1.0-7                bit64_4.0.5                
##  [5] filelock_1.0.2              progress_1.2.2             
##  [7] httr_1.4.6                  tools_4.2.3                
##  [9] backports_1.4.1             bslib_0.4.2                
## [11] utf8_1.2.3                  R6_2.5.1                   
## [13] mgcv_1.8-42                 DBI_1.1.3                  
## [15] colorspace_2.1-0            withr_2.5.0                
## [17] tidyselect_1.2.0            prettyunits_1.1.1          
## [19] bit_4.0.5                   curl_5.0.1                 
## [21] compiler_4.2.3              cli_3.6.1                  
## [23] xml2_1.3.4                  DelayedArray_0.24.0        
## [25] rtracklayer_1.58.0          sass_0.4.5                 
## [27] scales_1.2.1                rappdirs_0.3.3             
## [29] Rsamtools_2.14.0            stringr_1.5.0              
## [31] digest_0.6.31               rmarkdown_2.21             
## [33] XVector_0.38.0              pkgconfig_2.0.3            
## [35] htmltools_0.5.5             MatrixGenerics_1.10.0      
## [37] dbplyr_2.3.2                fastmap_1.1.1              
## [39] rlang_1.1.0                 rstudioapi_0.14            
## [41] RSQLite_2.3.1               BiocIO_1.8.0               
## [43] jquerylib_0.1.4             generics_0.1.3             
## [45] jsonlite_1.8.4              BiocParallel_1.32.6        
## [47] dplyr_1.1.2                 car_3.1-2                  
## [49] RCurl_1.98-1.12             magrittr_2.0.3             
## [51] GO.db_3.16.0                GenomeInfoDbData_1.2.9     
## [53] Matrix_1.6-4                Rcpp_1.0.10                
## [55] munsell_0.5.0               fansi_1.0.4                
## [57] abind_1.4-5                 lifecycle_1.0.3            
## [59] stringi_1.7.12              yaml_2.3.7                 
## [61] carData_3.0-5               SummarizedExperiment_1.28.0
## [63] zlibbioc_1.44.0             BiocFileCache_2.6.1        
## [65] grid_4.2.3                  blob_1.2.4                 
## [67] parallel_4.2.3              crayon_1.5.2               
## [69] lattice_0.20-45             splines_4.2.3              
## [71] Biostrings_2.66.0           cowplot_1.1.1              
## [73] hms_1.1.3                   KEGGREST_1.38.0            
## [75] knitr_1.44                  pillar_1.9.0               
## [77] rjson_0.2.21                ggsignif_0.6.4             
## [79] codetools_0.2-19            fastmatch_1.1-3            
## [81] XML_3.99-0.14               glue_1.6.2                 
## [83] evaluate_0.20               png_0.1-8                  
## [85] vctrs_0.6.2                 gtable_0.3.3               
## [87] purrr_1.0.1                 cachem_1.0.7               
## [89] xfun_0.39                   broom_1.0.4                
## [91] restfulr_0.0.15             rstatix_0.7.2              
## [93] tibble_3.2.1                GenomicAlignments_1.34.1   
## [95] memoise_2.0.1